# join只能对K-V值操作,类似sql中的join,也能实现内,左,右关联
#coding:utf8
from pyspark import SparkContext,SparkConf

if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    # 构建员工rdd
    rdd1 = sc.parallelize([(1001,"张三"),(1002,"李四"),(1003,"王五"),(1004,"赵六")])
    # 构建部门rdd
    rdd2 = sc.parallelize([(1001, "销售部"), (1002, "科技部")])

    # join自动按照key值进行关联
    # 内连接
    print(rdd1.join(rdd2).collect())
    # 左外连接
    print(rdd1.leftOuterJoin(rdd2).collect())
    # 右外连接
    print(rdd2.rightOuterJoin(rdd1).collect())